Package org.apache.lucene.facet.taxonomy

Source Code of org.apache.lucene.facet.taxonomy.TestTaxonomyCombined

package org.apache.lucene.facet.taxonomy;

import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.SlowRAMDirectory;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.junit.Test;

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// TODO: remove this suppress if we fix the TaxoWriter Codec to a non-default (see todo in DirTW)
@SuppressCodecs({"SimpleText","Lucene3x"})
public class TestTaxonomyCombined extends FacetTestCase {

  /**  The following categories will be added to the taxonomy by
    fillTaxonomy(), and tested by all tests below:
  */
  private final static String[][] categories = {
    { "Author", "Tom Clancy" },
    { "Author", "Richard Dawkins" },
    { "Author", "Richard Adams" },
    { "Price", "10", "11" },
    { "Price", "10", "12" },
    { "Price", "20", "27" },
    { "Date", "2006", "05" },
    { "Date", "2005" },
    { "Date", "2006" },
    { "Subject", "Nonfiction", "Children", "Animals" },
    { "Author", "Stephen Jay Gould" },
    { "Author", "\u05e0\u05d3\u05d1\u3042\u0628" },
  };
 
  /**  When adding the above categories with TaxonomyWriter.addCategory(),
    the following paths are expected to be returned:
    (note that currently the full path is not returned, and therefore
    not tested - rather, just the last component, the ordinal, is returned
    and tested.
  */
  private final static int[][] expectedPaths = {
    { 1, 2 },
    { 1, 3 },
    { 1, 4 },
    { 5, 6, 7 },
    { 5, 6, 8 },
    { 5, 9, 10 },
    { 11, 12, 13 },
    { 11, 14 },
    { 11, 12 },
    { 15, 16, 17, 18 },
    { 1, 19 },
    { 1, 20 }
  };

  /**  The taxonomy index is expected to then contain the following
    generated categories, with increasing ordinals (note how parent
    categories are be added automatically when subcategories are added).
   */ 
  private final static String[][] expectedCategories = {
    { }, // the root category
    { "Author" },
    { "Author", "Tom Clancy" },
    { "Author", "Richard Dawkins" },
    { "Author", "Richard Adams" },
    { "Price" },
    { "Price", "10" },
    { "Price", "10", "11" },
    { "Price", "10", "12" },
    { "Price", "20" },
    { "Price", "20", "27" },
    { "Date" },
    { "Date", "2006" },
    { "Date", "2006", "05" },
    { "Date", "2005" },
    { "Subject" },
    { "Subject", "Nonfiction" },
    { "Subject", "Nonfiction", "Children" },
    { "Subject", "Nonfiction", "Children", "Animals" },
    { "Author", "Stephen Jay Gould" },
    { "Author", "\u05e0\u05d3\u05d1\u3042\u0628" },
  };

  /**  fillTaxonomy adds the categories in the categories[] array, and asserts
    that the additions return exactly the ordinals (in the past - paths)
    specified in expectedPaths[].
    Note that this assumes that fillTaxonomy() is called on an empty taxonomy
    index. Calling it after something else was already added to the taxonomy
    index will surely have this method fail.
   */
  public static void fillTaxonomy(TaxonomyWriter tw) throws IOException {
    for (int i = 0; i < categories.length; i++) {
      int ordinal = tw.addCategory(new CategoryPath(categories[i]));
      int expectedOrdinal = expectedPaths[i][expectedPaths[i].length-1];
      if (ordinal!=expectedOrdinal) {
        fail("For category "+showcat(categories[i])+" expected ordinal "+
            expectedOrdinal+", but got "+ordinal);
      }
    }
  }

  public static String showcat(String[] path) {
    if (path==null) {
      return "<null>";
    }
    if (path.length==0) {
      return "<empty>";
    }
    if (path.length==1 && path[0].length()==0) {
      return "<\"\">";
    }
    StringBuilder sb = new StringBuilder(path[0]);
    for (int i=1; i<path.length; i++) {
      sb.append('/');
      sb.append(path[i]);
    }
    return sb.toString();
  }

  private String showcat(CategoryPath path) {
    if (path==null) {
      return "<null>";
    }
    if (path.length==0) {
      return "<empty>";
    }
    return "<"+path.toString('/')+">";
  }

  /**  Basic tests for TaxonomyWriter. Basically, we test that
    IndexWriter.addCategory works, i.e. returns the expected ordinals
    (this is tested by calling the fillTaxonomy() method above).
    We do not test here that after writing the index can be read -
    this will be done in more tests below.
   */
  @Test
  public void testWriter() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    fillTaxonomy(tw);
    // Also check TaxonomyWriter.getSize() - see that the taxonomy's size
    // is what we expect it to be.
    assertEquals(expectedCategories.length, tw.getSize());
    tw.close();
    indexDir.close();
  }

  /**  testWriterTwice is exactly like testWriter, except that after adding
    all the categories, we add them again, and see that we get the same
    old ids again - not new categories.
   */
  @Test
  public void testWriterTwice() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    fillTaxonomy(tw);
    // run fillTaxonomy again - this will try to add the same categories
    // again, and check that we see the same ordinal paths again, not
    // different ones.
    fillTaxonomy(tw);
    // Let's check the number of categories again, to see that no
    // extraneous categories were created:
    assertEquals(expectedCategories.length, tw.getSize());   
    tw.close();
    indexDir.close();
  }

  /**  testWriterTwice2 is similar to testWriterTwice, except that the index
    is closed and reopened before attempting to write to it the same
    categories again. While testWriterTwice can get along with writing
    and reading correctly just to the cache, testWriterTwice2 checks also
    the actual disk read part of the writer:
   */
  @Test
  public void testWriterTwice2() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    fillTaxonomy(tw);
    tw.close();
    tw = new DirectoryTaxonomyWriter(indexDir);
    // run fillTaxonomy again - this will try to add the same categories
    // again, and check that we see the same ordinals again, not different
    // ones, and that the number of categories hasn't grown by the new
    // additions
    fillTaxonomy(tw);
    assertEquals(expectedCategories.length, tw.getSize());   
    tw.close();
    indexDir.close();
  }
 
  /**
   * testWriterTwice3 is yet another test which tests creating a taxonomy
   * in two separate writing sessions. This test used to fail because of
   * a bug involving commit(), explained below, and now should succeed.
   */
  @Test
  public void testWriterTwice3() throws Exception {
    Directory indexDir = newDirectory();
    // First, create and fill the taxonomy
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    fillTaxonomy(tw);
    tw.close();
    // Now, open the same taxonomy and add the same categories again.
    // After a few categories, the LuceneTaxonomyWriter implementation
    // will stop looking for each category on disk, and rather read them
    // all into memory and close it's reader. The bug was that it closed
    // the reader, but forgot that it did (because it didn't set the reader
    // reference to null).
    tw = new DirectoryTaxonomyWriter(indexDir);
    fillTaxonomy(tw);
    // Add one new category, just to make commit() do something:
    tw.addCategory(new CategoryPath("hi"));
    // Do a commit(). Here was a bug - if tw had a reader open, it should
    // be reopened after the commit. However, in our case the reader should
    // not be open (as explained above) but because it was not set to null,
    // we forgot that, tried to reopen it, and got an AlreadyClosedException.
    tw.commit();
    assertEquals(expectedCategories.length+1, tw.getSize());   
    tw.close();
    indexDir.close();
 
 
  /**  Another set of tests for the writer, which don't use an array and
   *  try to distill the different cases, and therefore may be more helpful
   *  for debugging a problem than testWriter() which is hard to know why
   *  or where it failed.
   */
  @Test
  public void testWriterSimpler() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    assertEquals(1, tw.getSize()); // the root only
    // Test that adding a new top-level category works
    assertEquals(1, tw.addCategory(new CategoryPath("a")));
    assertEquals(2, tw.getSize());
    // Test that adding the same category again is noticed, and the
    // same ordinal (and not a new one) is returned.
    assertEquals(1, tw.addCategory(new CategoryPath("a")));
    assertEquals(2, tw.getSize());
    // Test that adding another top-level category returns a new ordinal,
    // not the same one
    assertEquals(2, tw.addCategory(new CategoryPath("b")));
    assertEquals(3, tw.getSize());
    // Test that adding a category inside one of the above adds just one
    // new ordinal:
    assertEquals(3, tw.addCategory(new CategoryPath("a","c")));
    assertEquals(4, tw.getSize());
    // Test that adding the same second-level category doesn't do anything:
    assertEquals(3, tw.addCategory(new CategoryPath("a","c")));
    assertEquals(4, tw.getSize());
    // Test that adding a second-level category with two new components
    // indeed adds two categories
    assertEquals(5, tw.addCategory(new CategoryPath("d","e")));
    assertEquals(6, tw.getSize());
    // Verify that the parents were added above in the order we expected
    assertEquals(4, tw.addCategory(new CategoryPath("d")));
    // Similar, but inside a category that already exists:
    assertEquals(7, tw.addCategory(new CategoryPath("b", "d","e")));
    assertEquals(8, tw.getSize());
    // And now inside two levels of categories that already exist:
    assertEquals(8, tw.addCategory(new CategoryPath("b", "d","f")));
    assertEquals(9, tw.getSize());
   
    tw.close();
    indexDir.close();
  }
 
  /**  Test writing an empty index, and seeing that a reader finds in it
    the root category, and only it. We check all the methods on that
    root category return the expected results.
   */
  @Test
  public void testRootOnly() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    // right after opening the index, it should already contain the
    // root, so have size 1:
    assertEquals(1, tw.getSize());
    tw.close();
    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
    assertEquals(1, tr.getSize());
    assertEquals(0, tr.getPath(0).length);
    assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
    assertEquals(0, tr.getOrdinal(CategoryPath.EMPTY));
    tr.close();
    indexDir.close();
  }

  /**  The following test is exactly the same as testRootOnly, except we
   *  do not close the writer before opening the reader. We want to see
   *  that the root is visible to the reader not only after the writer is
   *  closed, but immediately after it is created.
   */
  @Test
  public void testRootOnly2() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    tw.commit();
    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
    assertEquals(1, tr.getSize());
    assertEquals(0, tr.getPath(0).length);
    assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
    assertEquals(0, tr.getOrdinal(CategoryPath.EMPTY));
    tw.close();
    tr.close();
    indexDir.close();
  }

  /**  Basic tests for TaxonomyReader's category <=> ordinal transformations
    (getSize(), getCategory() and getOrdinal()).
    We test that after writing the index, it can be read and all the
    categories and ordinals are there just as we expected them to be.
   */
  @Test
  public void testReaderBasic() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    fillTaxonomy(tw);
    tw.close();
    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);

    // test TaxonomyReader.getSize():
    assertEquals(expectedCategories.length, tr.getSize());

    // test round trips of ordinal => category => ordinal
    for (int i=0; i<tr.getSize(); i++) {
      assertEquals(i, tr.getOrdinal(tr.getPath(i)));
    }

    // test TaxonomyReader.getCategory():
    for (int i = 1; i < tr.getSize(); i++) {
      CategoryPath expectedCategory = new CategoryPath(expectedCategories[i]);
      CategoryPath category = tr.getPath(i);
      if (!expectedCategory.equals(category)) {
        fail("For ordinal "+i+" expected category "+
            showcat(expectedCategory)+", but got "+showcat(category));
      }
    }
    //  (also test invalid ordinals:)
    assertNull(tr.getPath(-1));
    assertNull(tr.getPath(tr.getSize()));
    assertNull(tr.getPath(TaxonomyReader.INVALID_ORDINAL));

    // test TaxonomyReader.getOrdinal():
    for (int i = 1; i < expectedCategories.length; i++) {
      int expectedOrdinal = i;
      int ordinal = tr.getOrdinal(new CategoryPath(expectedCategories[i]));
      if (expectedOrdinal != ordinal) {
        fail("For category "+showcat(expectedCategories[i])+" expected ordinal "+
            expectedOrdinal+", but got "+ordinal);
      }
    }
    // (also test invalid categories:)
    assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new CategoryPath("non-existant")));
    assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new CategoryPath("Author", "Jules Verne")));

    tr.close();
    indexDir.close();
  }

  /**  Tests for TaxonomyReader's getParent() method.
    We check it by comparing its results to those we could have gotten by
    looking at the category string paths (where the parentage is obvious).
    Note that after testReaderBasic(), we already know we can trust the
    ordinal <=> category conversions.
   
    Note: At the moment, the parent methods in the reader are deprecated,
    but this does not mean they should not be tested! Until they are
    removed (*if* they are removed), these tests should remain to see
    that they still work correctly.
   */

  @Test
  public void testReaderParent() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    fillTaxonomy(tw);
    tw.close();
    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);

    // check that the parent of the root ordinal is the invalid ordinal:
    assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));

    // check parent of non-root ordinals:
    for (int ordinal=1; ordinal<tr.getSize(); ordinal++) {
      CategoryPath me = tr.getPath(ordinal);
      int parentOrdinal = tr.getParent(ordinal);
      CategoryPath parent = tr.getPath(parentOrdinal);
      if (parent==null) {
        fail("Parent of "+ordinal+" is "+parentOrdinal+
        ", but this is not a valid category.");
      }
      // verify that the parent is indeed my parent, according to the strings
      if (!me.subpath(me.length-1).equals(parent)) {
        fail("Got parent "+parentOrdinal+" for ordinal "+ordinal+
            " but categories are "+showcat(parent)+" and "+showcat(me)+
            " respectively.");
      }
    }

    // check parent of of invalid ordinals:
    try {
      tr.getParent(-1);
      fail("getParent for -1 should throw exception");
    } catch (ArrayIndexOutOfBoundsException e) {
      // ok
    }
    try {
      tr.getParent(TaxonomyReader.INVALID_ORDINAL);
      fail("getParent for INVALID_ORDINAL should throw exception");
    } catch (ArrayIndexOutOfBoundsException e) {
      // ok
    }
    try {
      int parent = tr.getParent(tr.getSize());
      fail("getParent for getSize() should throw exception, but returned "+parent);
    } catch (ArrayIndexOutOfBoundsException e) {
      // ok
    }

    tr.close();
    indexDir.close();
  }
 
  /**
   * Tests for TaxonomyWriter's getParent() method. We check it by comparing
   * its results to those we could have gotten by looking at the category
   * string paths using a TaxonomyReader (where the parentage is obvious).
   * Note that after testReaderBasic(), we already know we can trust the
   * ordinal <=> category conversions from TaxonomyReader.
   *
   * The difference between testWriterParent1 and testWriterParent2 is that
   * the former closes the taxonomy writer before reopening it, while the
   * latter does not.
   *
   * This test code is virtually identical to that of testReaderParent().
   */
  @Test
  public void testWriterParent1() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    fillTaxonomy(tw);
    tw.close();
    tw = new DirectoryTaxonomyWriter(indexDir);
    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
   
    checkWriterParent(tr, tw);
   
    tw.close();
    tr.close();
    indexDir.close();
  }

  @Test
  public void testWriterParent2() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    fillTaxonomy(tw);
    tw.commit();
    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
   
    checkWriterParent(tr, tw);
   
    tw.close();
    tr.close();
    indexDir.close();
  }
 
  private void checkWriterParent(TaxonomyReader tr, TaxonomyWriter tw) throws Exception {
    // check that the parent of the root ordinal is the invalid ordinal:
    assertEquals(TaxonomyReader.INVALID_ORDINAL, tw.getParent(0));

    // check parent of non-root ordinals:
    for (int ordinal = 1; ordinal < tr.getSize(); ordinal++) {
      CategoryPath me = tr.getPath(ordinal);
      int parentOrdinal = tw.getParent(ordinal);
      CategoryPath parent = tr.getPath(parentOrdinal);
      if (parent == null) {
        fail("Parent of " + ordinal + " is " + parentOrdinal
            + ", but this is not a valid category.");
      }
      // verify that the parent is indeed my parent, according to the
      // strings
      if (!me.subpath(me.length - 1).equals(parent)) {
        fail("Got parent " + parentOrdinal + " for ordinal " + ordinal
            + " but categories are " + showcat(parent) + " and "
            + showcat(me) + " respectively.");
      }
    }

    // check parent of of invalid ordinals:
    try {
      tw.getParent(-1);
      fail("getParent for -1 should throw exception");
    } catch (ArrayIndexOutOfBoundsException e) {
      // ok
    }
    try {
      tw.getParent(TaxonomyReader.INVALID_ORDINAL);
      fail("getParent for INVALID_ORDINAL should throw exception");
    } catch (ArrayIndexOutOfBoundsException e) {
      // ok
    }
    try {
      int parent = tw.getParent(tr.getSize());
      fail("getParent for getSize() should throw exception, but returned "
          + parent);
    } catch (ArrayIndexOutOfBoundsException e) {
      // ok
    }
  }

  /**  Tests TaxonomyReader's getParentArray() method. We do not test this
    method directly, but rather just compare its results to those from
    other methods (which we have already tested above).
   */
  @Test
  public void testReaderParentArray() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    fillTaxonomy(tw);
    tw.close();
    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
    int[] parents = tr.getParallelTaxonomyArrays().parents();
    assertEquals(tr.getSize(), parents.length);
    for (int i=0; i<tr.getSize(); i++) {
      assertEquals(tr.getParent(i), parents[i]);
    }
    tr.close();
    indexDir.close();
  }
 
  /**
   * Test TaxonomyReader's child browsing method, getChildrenArrays()
   * This only tests for correctness of the data on one example - we have
   * below further tests on data refresh etc.
   */
  @Test
  public void testChildrenArrays() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    fillTaxonomy(tw);
    tw.close();
    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
    ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
    int[] youngestChildArray = ca.children();
    assertEquals(tr.getSize(), youngestChildArray.length);
    int[] olderSiblingArray = ca.siblings();
    assertEquals(tr.getSize(), olderSiblingArray.length);
    for (int i=0; i<expectedCategories.length; i++) {
      // find expected children by looking at all expectedCategories
      // for children
      ArrayList<Integer> expectedChildren = new ArrayList<Integer>();
      for (int j=expectedCategories.length-1; j>=0; j--) {
        if (expectedCategories[j].length != expectedCategories[i].length+1) {
          continue; // not longer by 1, so can't be a child
        }
        boolean ischild=true;
        for (int k=0; k<expectedCategories[i].length; k++) {
          if (!expectedCategories[j][k].equals(expectedCategories[i][k])) {
            ischild=false;
            break;
          }
        }
        if (ischild) {
          expectedChildren.add(j);
        }
      }
      // check that children and expectedChildren are the same, with the
      // correct reverse (youngest to oldest) order:
      if (expectedChildren.size()==0) {
        assertEquals(TaxonomyReader.INVALID_ORDINAL, youngestChildArray[i]);
      } else {
        int child = youngestChildArray[i];
        assertEquals(expectedChildren.get(0).intValue(),
            child);
        for (int j=1; j<expectedChildren.size(); j++) {
          child = olderSiblingArray[child];
          assertEquals(expectedChildren.get(j).intValue(),
              child);
          // if child is INVALID_ORDINAL we should stop, but
          // assertEquals would fail in this case anyway.
        }
        // When we're done comparing, olderSiblingArray should now point
        // to INVALID_ORDINAL, saying there are no more children. If it
        // doesn't, we found too many children...
        assertEquals(-1, olderSiblingArray[child]);
      }
    }
    tr.close();
    indexDir.close();
  }

  /**
   * Similar to testChildrenArrays, except rather than look at
   * expected results, we test for several "invariants" that the results
   * should uphold, e.g., that a child of a category indeed has this category
   * as its parent. This sort of test can more easily be extended to larger
   * example taxonomies, because we do not need to build the expected list
   * of categories like we did in the above test.
   */
  @Test
  public void testChildrenArraysInvariants() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    fillTaxonomy(tw);
    tw.close();
    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
    ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
    int[] children = ca.children();
    assertEquals(tr.getSize(), children.length);
    int[] olderSiblingArray = ca.siblings();
    assertEquals(tr.getSize(), olderSiblingArray.length);
       
    // test that the "youngest child" of every category is indeed a child:
    for (int i=0; i<tr.getSize(); i++) {
      int youngestChild = children[i];
      if (youngestChild != TaxonomyReader.INVALID_ORDINAL) {
        assertEquals(i, tr.getParent(youngestChild));
      }
    }
       
    // test that the "older sibling" of every category is indeed older (lower)
    // (it can also be INVALID_ORDINAL, which is lower than any ordinal)
    for (int i=0; i<tr.getSize(); i++) {
      assertTrue("olderSiblingArray["+i+"] should be <"+i, olderSiblingArray[i] < i);
    }
   
    // test that the "older sibling" of every category is indeed a sibling
    // (they share the same parent)
    for (int i=0; i<tr.getSize(); i++) {
      int sibling = olderSiblingArray[i];
      if (sibling == TaxonomyReader.INVALID_ORDINAL) {
        continue;
      }
      assertEquals(tr.getParent(i), tr.getParent(sibling));
    }
   
    // And now for slightly more complex (and less "invariant-like"...)
    // tests:
   
    // test that the "youngest child" is indeed the youngest (so we don't
    // miss the first children in the chain)
    for (int i=0; i<tr.getSize(); i++) {
      // Find the really youngest child:
      int j;
      for (j=tr.getSize()-1; j>i; j--) {
        if (tr.getParent(j)==i) {
          break; // found youngest child
        }
      }
      if (j==i) { // no child found
        j=TaxonomyReader.INVALID_ORDINAL;
      }
      assertEquals(j, children[i]);
    }

    // test that the "older sibling" is indeed the least oldest one - and
    // not a too old one or -1 (so we didn't miss some children in the
    // middle or the end of the chain).
    for (int i=0; i<tr.getSize(); i++) {
      // Find the youngest older sibling:
      int j;
      for (j=i-1; j>=0; j--) {
        if (tr.getParent(j)==tr.getParent(i)) {
          break; // found youngest older sibling
        }
      }
      if (j<0) { // no sibling found
        j=TaxonomyReader.INVALID_ORDINAL;
      }
      assertEquals(j, olderSiblingArray[i]);
    }
 
    tr.close();
    indexDir.close();
  }
 
  /**
   * Test how getChildrenArrays() deals with the taxonomy's growth:
   */
  @Test
  public void testChildrenArraysGrowth() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    tw.addCategory(new CategoryPath("hi", "there"));
    tw.commit();
    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
    ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
    assertEquals(3, tr.getSize());
    assertEquals(3, ca.siblings().length);
    assertEquals(3, ca.children().length);
    assertTrue(Arrays.equals(new int[] { 1, 2, -1 }, ca.children()));
    assertTrue(Arrays.equals(new int[] { -1, -1, -1 }, ca.siblings()));
    tw.addCategory(new CategoryPath("hi", "ho"));
    tw.addCategory(new CategoryPath("hello"));
    tw.commit();
    // Before refresh, nothing changed..
    ParallelTaxonomyArrays newca = tr.getParallelTaxonomyArrays();
    assertSame(newca, ca); // we got exactly the same object
    assertEquals(3, tr.getSize());
    assertEquals(3, ca.siblings().length);
    assertEquals(3, ca.children().length);
    // After the refresh, things change:
    TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
    assertNotNull(newtr);
    tr.close();
    tr = newtr;
    ca = tr.getParallelTaxonomyArrays();
    assertEquals(5, tr.getSize());
    assertEquals(5, ca.siblings().length);
    assertEquals(5, ca.children().length);
    assertTrue(Arrays.equals(new int[] { 4, 3, -1, -1, -1 }, ca.children()));
    assertTrue(Arrays.equals(new int[] { -1, -1, -1, 2, 1 }, ca.siblings()));
    tw.close();
    tr.close();
    indexDir.close();
  }
 
  // Test that getParentArrays is valid when retrieved during refresh
  @Test
  public void testTaxonomyReaderRefreshRaces() throws Exception {
    // compute base child arrays - after first chunk, and after the other
    Directory indexDirBase = newDirectory();
    TaxonomyWriter twBase = new DirectoryTaxonomyWriter(indexDirBase);
    twBase.addCategory(new CategoryPath("a", "0"));
    final CategoryPath abPath = new CategoryPath("a", "b");
    twBase.addCategory(abPath);
    twBase.commit();
    TaxonomyReader trBase = new DirectoryTaxonomyReader(indexDirBase);

    final ParallelTaxonomyArrays ca1 = trBase.getParallelTaxonomyArrays();
   
    final int abOrd = trBase.getOrdinal(abPath);
    final int abYoungChildBase1 = ca1.children()[abOrd];
   
    final int numCategories = atLeast(800);
    for (int i = 0; i < numCategories; i++) {
      twBase.addCategory(new CategoryPath("a", "b", Integer.toString(i)));
    }
    twBase.close();
   
    TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(trBase);
    assertNotNull(newTaxoReader);
    trBase.close();
    trBase = newTaxoReader;
   
    final ParallelTaxonomyArrays ca2 = trBase.getParallelTaxonomyArrays();
    final int abYoungChildBase2 = ca2.children()[abOrd];
   
    int numRetries = atLeast(50);
    for (int retry = 0; retry < numRetries; retry++) {
      assertConsistentYoungestChild(abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry, numCategories);
    }
   
    trBase.close();
    indexDirBase.close();
  }

  private void assertConsistentYoungestChild(final CategoryPath abPath,
      final int abOrd, final int abYoungChildBase1, final int abYoungChildBase2, final int retry, int numCategories)
      throws Exception {
    SlowRAMDirectory indexDir = new SlowRAMDirectory(-1, null); // no slowness for intialization
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    tw.addCategory(new CategoryPath("a", "0"));
    tw.addCategory(abPath);
    tw.commit();
   
    final DirectoryTaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
    for (int i = 0; i < numCategories; i++) {
      final CategoryPath cp = new CategoryPath("a", "b", Integer.toString(i));
      tw.addCategory(cp);
      assertEquals("Ordinal of "+cp+" must be invalid until Taxonomy Reader was refreshed", TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(cp));
    }
    tw.close();
   
    final AtomicBoolean stop = new AtomicBoolean(false);
    final Throwable[] error = new Throwable[] { null };
    final int retrieval[] = { 0 };
   
    Thread thread = new Thread("Child Arrays Verifier") {
      @Override
      public void run() {
        setPriority(1 + getPriority());
        try {
          while (!stop.get()) {
            int lastOrd = tr.getParallelTaxonomyArrays().parents().length - 1;
            assertNotNull("path of last-ord " + lastOrd + " is not found!", tr.getPath(lastOrd));
            assertChildrenArrays(tr.getParallelTaxonomyArrays(), retry, retrieval[0]++);
            sleep(10); // don't starve refresh()'s CPU, which sleeps every 50 bytes for 1 ms
          }
        } catch (Throwable e) {
          error[0] = e;
          stop.set(true);
        }
      }

      private void assertChildrenArrays(ParallelTaxonomyArrays ca, int retry, int retrieval) {
        final int abYoungChild = ca.children()[abOrd];
        assertTrue(
            "Retry "+retry+": retrieval: "+retrieval+": wrong youngest child for category "+abPath+" (ord="+abOrd+
            ") - must be either "+abYoungChildBase1+" or "+abYoungChildBase2+" but was: "+abYoungChild,
            abYoungChildBase1==abYoungChild ||
            abYoungChildBase2==ca.children()[abOrd]);
      }
    };
    thread.start();
   
    indexDir.setSleepMillis(1); // some delay for refresh
    TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr);
    if (newTaxoReader != null) {
      newTaxoReader.close();
    }
   
    stop.set(true);
    thread.join();
    assertNull("Unexpcted exception at retry "+retry+" retrieval "+retrieval[0]+": \n"+stackTraceStr(error[0]), error[0]);
   
    tr.close();
  }

  /** Grab the stack trace into a string since the exception was thrown in a thread and we want the assert
   * outside the thread to show the stack trace in case of failure.   */
  private String stackTraceStr(final Throwable error) {
    if (error == null) {
      return "";
    }
    StringWriter sw = new StringWriter();
    PrintWriter pw = new PrintWriter(sw);
    error.printStackTrace(pw);
    pw.close();
    return sw.toString();
  }
 
  /**  Test that if separate reader and writer objects are opened, new
    categories written into the writer are available to a reader only
    after a commit().
    Note that this test obviously doesn't cover all the different
    concurrency scenarios, all different methods, and so on. We may
    want to write more tests of this sort.

    This test simulates what would happen when there are two separate
    processes, one doing indexing, and the other searching, and each opens
    its own object (with obviously no connection between the objects) using
    the same disk files. Note, though, that this test does not test what
    happens when the two processes do their actual work at exactly the same
    time.
    It also doesn't test multi-threading.
   */
  @Test
  public void testSeparateReaderAndWriter() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    tw.commit();
    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);

    int author = 1;

    // getParent() and getSize() test:
    try {
      tr.getParent(author);
      fail("Initially, getParent for "+author+" should throw exception");
    } catch (ArrayIndexOutOfBoundsException e) {
      // ok
    }
    assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root)
    tw.addCategory(new CategoryPath("Author"));
    try {
      tr.getParent(author);
      fail("Before commit() and refresh(), getParent for "+author+" should still throw exception");
    } catch (ArrayIndexOutOfBoundsException e) {
      // ok
    }
    assertEquals(1, tr.getSize()); // still root only...
    assertNull(TaxonomyReader.openIfChanged(tr)); // this is not enough, because tw.commit() hasn't been done yet
    try {
      tr.getParent(author);
      fail("Before commit() and refresh(), getParent for "+author+" should still throw exception");
    } catch (ArrayIndexOutOfBoundsException e) {
      // ok
    }
    assertEquals(1, tr.getSize()); // still root only...
    tw.commit();
    try {
      tr.getParent(author);
      fail("Before refresh(), getParent for "+author+" should still throw exception");
    } catch (ArrayIndexOutOfBoundsException e) {
      // ok
    }
    assertEquals(1, tr.getSize()); // still root only...
    TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr);
    assertNotNull(newTaxoReader);
    tr.close();
    tr = newTaxoReader;
   
    try {
      assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParent(author));
      // ok
    } catch (ArrayIndexOutOfBoundsException e) {
      fail("After category addition, commit() and refresh(), getParent for "+author+" should NOT throw exception");
    }
    assertEquals(2, tr.getSize()); // finally, see there are two categories

    // now, add another category, and verify that after commit and refresh
    // the parent of this category is correct (this requires the reader
    // to correctly update its prefetched parent vector), and that the
    // old information also wasn't ruined:
    tw.addCategory(new CategoryPath("Author", "Richard Dawkins"));
    int dawkins = 2;
    tw.commit();
    newTaxoReader = TaxonomyReader.openIfChanged(tr);
    assertNotNull(newTaxoReader);
    tr.close();
    tr = newTaxoReader;
    assertEquals(author, tr.getParent(dawkins));
    assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParent(author));
    assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(TaxonomyReader.ROOT_ORDINAL));
    assertEquals(3, tr.getSize());
    tw.close();
    tr.close();
    indexDir.close();
  }
 
  @Test
  public void testSeparateReaderAndWriter2() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    tw.commit();
    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);

    // Test getOrdinal():
    CategoryPath author = new CategoryPath("Author");

    assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root)
    assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
    tw.addCategory(author);
    // before commit and refresh, no change:
    assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
    assertEquals(1, tr.getSize()); // still root only...
    assertNull(TaxonomyReader.openIfChanged(tr)); // this is not enough, because tw.commit() hasn't been done yet
    assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
    assertEquals(1, tr.getSize()); // still root only...
    tw.commit();
    // still not enough before refresh:
    assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
    assertEquals(1, tr.getSize()); // still root only...
    TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr);
    assertNotNull(newTaxoReader);
    tr.close();
    tr = newTaxoReader;
    assertEquals(1, tr.getOrdinal(author));
    assertEquals(2, tr.getSize());
    tw.close();
    tr.close();
    indexDir.close();
  }
 
  /**
   * Test what happens if we try to write to a locked taxonomy writer,
   * and see that we can unlock it and continue.
   */
  @Test
  public void testWriterLock() throws Exception {
    // native fslock impl gets angry if we use it, so use RAMDirectory explicitly.
    Directory indexDir = new RAMDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    tw.addCategory(new CategoryPath("hi", "there"));
    tw.commit();
    // we deliberately not close the write now, and keep it open and
    // locked.
    // Verify that the writer worked:
    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
    assertEquals(2, tr.getOrdinal(new CategoryPath("hi", "there")));
    // Try to open a second writer, with the first one locking the directory.
    // We expect to get a LockObtainFailedException.
    try {
      assertNull(new DirectoryTaxonomyWriter(indexDir));
      fail("should have failed to write in locked directory");
    } catch (LockObtainFailedException e) {
      // this is what we expect to happen.
    }
    // Remove the lock, and now the open should succeed, and we can
    // write to the new writer.
    DirectoryTaxonomyWriter.unlock(indexDir);
    TaxonomyWriter tw2 = new DirectoryTaxonomyWriter(indexDir);
    tw2.addCategory(new CategoryPath("hey"));
    tw2.close();
    // See that the writer indeed wrote:
    TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
    assertNotNull(newtr);
    tr.close();
    tr = newtr;
    assertEquals(3, tr.getOrdinal(new CategoryPath("hey")));
    tr.close();
    tw.close();
    indexDir.close();
  }
 
  /**
   * fillTaxonomyCheckPaths adds the categories in the categories[] array,
   * and asserts that the additions return exactly paths specified in
   * expectedPaths[]. This is the same add fillTaxonomy() but also checks
   * the correctness of getParent(), not just addCategory().
   * Note that this assumes that fillTaxonomyCheckPaths() is called on an empty
   * taxonomy index. Calling it after something else was already added to the
   * taxonomy index will surely have this method fail.
   */
  public static void fillTaxonomyCheckPaths(TaxonomyWriter tw) throws IOException {
    for (int i = 0; i < categories.length; i++) {
      int ordinal = tw.addCategory(new CategoryPath(categories[i]));
      int expectedOrdinal = expectedPaths[i][expectedPaths[i].length-1];
      if (ordinal!=expectedOrdinal) {
        fail("For category "+showcat(categories[i])+" expected ordinal "+
            expectedOrdinal+", but got "+ordinal);
      }
      for (int j=expectedPaths[i].length-2; j>=0; j--) {
        ordinal = tw.getParent(ordinal);
        expectedOrdinal = expectedPaths[i][j];
        if (ordinal!=expectedOrdinal) {
          fail("For category "+showcat(categories[i])+" expected ancestor level "+
              (expectedPaths[i].length-1-j)+" was "+expectedOrdinal+
              ", but got "+ordinal);
        }
      }   
    }
  }
 
  // After fillTaxonomy returned successfully, checkPaths() checks that
  // the getParent() calls return as expected, from the table
  public static void checkPaths(TaxonomyWriter tw) throws IOException {
    for (int i = 0; i < categories.length; i++) {
      int ordinal = expectedPaths[i][expectedPaths[i].length-1];
      for (int j=expectedPaths[i].length-2; j>=0; j--) {
        ordinal = tw.getParent(ordinal);
        int expectedOrdinal = expectedPaths[i][j];
        if (ordinal!=expectedOrdinal) {
          fail("For category "+showcat(categories[i])+" expected ancestor level "+
              (expectedPaths[i].length-1-j)+" was "+expectedOrdinal+
              ", but got "+ordinal);
        }
      }
      assertEquals(TaxonomyReader.ROOT_ORDINAL, tw.getParent(expectedPaths[i][0]));
    }
    assertEquals(TaxonomyReader.INVALID_ORDINAL, tw.getParent(TaxonomyReader.ROOT_ORDINAL));
  }
 
  /**
   * Basic test for TaxonomyWriter.getParent(). This is similar to testWriter
   * above, except we also check the parents of the added categories, not just
   * the categories themselves.
   */
  @Test
  public void testWriterCheckPaths() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    fillTaxonomyCheckPaths(tw);
    // Also check TaxonomyWriter.getSize() - see that the taxonomy's size
    // is what we expect it to be.
    assertEquals(expectedCategories.length, tw.getSize());
    tw.close();
    indexDir.close();
  }
 
  /**
   * testWriterCheckPaths2 is the path-checking variant of testWriterTwice
   * and testWriterTwice2. After adding all the categories, we add them again,
   * and see that we get the same old ids and paths. We repeat the path checking
   * yet again after closing and opening the index for writing again - to see
   * that the reading of existing data from disk works as well.
   */
  @Test
  public void testWriterCheckPaths2() throws Exception {
    Directory indexDir = newDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
    fillTaxonomy(tw);
    checkPaths(tw);
    fillTaxonomy(tw);
    checkPaths(tw);
    tw.close();

    tw = new DirectoryTaxonomyWriter(indexDir);
    checkPaths(tw);
    fillTaxonomy(tw);
    checkPaths(tw);
    tw.close();
    indexDir.close();
  }

  @Test
  public void testNRT() throws Exception {
    Directory dir = newDirectory();
    DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
    TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
   
    CategoryPath cp = new CategoryPath("a");
    writer.addCategory(cp);
    TaxonomyReader newReader = TaxonomyReader.openIfChanged(reader);
    assertNotNull("expected a new instance", newReader);
    assertEquals(2, newReader.getSize());
    assertNotSame(TaxonomyReader.INVALID_ORDINAL, newReader.getOrdinal(cp));
    reader.close();
    reader = newReader;
   
    writer.close();
    reader.close();
   
    dir.close();
  }

//  TODO (Facet): test multiple readers, one writer. Have the multiple readers
//  using the same object (simulating threads) or different objects
//  (simulating processes).
}
TOP

Related Classes of org.apache.lucene.facet.taxonomy.TestTaxonomyCombined

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.